From our exploratory analysis, we know that the tissue of origin (PB, lymph node or bone marrow) is a major driver of transcriptional variability. Thus, we will only work with clinical samples that come from the same niche.
library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)
# Paths
path_to_obj <- here::here("results/R_objects/4.seurat_leukemic.rds")
path_to_save <- here::here("results/R_objects/5.seurat_list_clustered.rds")
# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
"blue", "mediumorchid2", "coral2", "blueviolet",
"indianred4", "deepskyblue1", "dimgray", "deeppink1",
"green", "lightgray", "hotpink1")
# Source functions
source(here::here("bin/utils.R"))
seurat <- readRDS(path_to_obj)
Let us start by visualizing our initial embedding:
DimPlot(seurat, group.by = "tissue")
We will know split the cells into its donor of origin and check the tissue of origin across samples:
seurat_list <- SplitObject(seurat, split.by = "donor_id")
purrr::walk(names(seurat_list), function(x) {
print(x)
print(table(seurat_list[[x]]$sample_description_FN, seurat_list[[x]]$tissue))
})
## [1] "12"
##
## PB
## posttreatment_2 1759
## posttreatment_3 2689
## pretreatment 5543
## pretreatment_progression 488
## RS 1417
## [1] "19"
##
## PB
## posttreatment 3348
## posttreatment_2 1672
## posttreatment_3 1194
## pretreatment 969
## RS 2372
## [1] "3299"
##
## BM PB
## posttreatment 4500 0
## posttreatment_2 1307 0
## posttreatment_3 739 0
## RS 0 2901
## [1] "365"
##
## LN PB
## posttreatment 0 4959
## RS 1232 0
table(seurat_list[["3299"]]$sample_description_FN, seurat_list[["3299"]]$tissue)
##
## BM PB
## posttreatment 4500 0
## posttreatment_2 1307 0
## posttreatment_3 739 0
## RS 0 2901
The samples from donor 365 are clearly confounded by tissue. In other words, in case of observing differences between time-points, we would not know if they come from differences in niche or clinical stage. Thus, we will rule it out for now.
On the other hand, the RS sample from donor 3299 comes from PB, whilst the others come from BM. However, since we know that Ferran could assign 80 to 90% of the cells in post-treatment_3 to Richter cells, we will work with them and exclude the RS sample.
seurat_list <- seurat_list[c("12", "19", "3299")]
seurat_list$`3299`
## An object of class Seurat
## 23403 features across 9447 samples within 1 assay
## Active assay: RNA (23403 features, 2000 variable features)
## 3 dimensional reductions calculated: pca, harmony, umap
seurat_list$`3299` <- subset(
seurat_list$`3299`,
subset = sample_description_FN != "RS"
)
seurat_list$`3299`
## An object of class Seurat
## 23403 features across 6546 samples within 1 assay
## Active assay: RNA (23403 features, 2000 variable features)
## 3 dimensional reductions calculated: pca, harmony, umap
seurat_list <- purrr::map(seurat_list, process_seurat, dims = 1:20)
umaps_subproject <- purrr::map(seurat_list, DimPlot, group.by = "subproject")
umaps_subproject
## $`12`
##
## $`19`
##
## $`3299`
As we can see, there is a large batch effect between the two experiments we conducted BCLLATLAS_10 and BCLLATLAS_29. As we know, in BCLLATLAS_10 we obtain single-cell transcriptomes for serial clinical samples of 4 donors. However, as we observed that some points were underrepresented (had less cells), we performed scRNA-seq of specific cases (BCLLATLAS_29).
Thus, we will initially work with BCLLATLAS_10, and use BCLLATLAS_29 later as a validation.
seurat_list <- purrr::map(seurat_list, function(seurat_obj) {
seurat_obj <- subset(seurat_obj, subset = subproject == "BCLLATLAS_10")
seurat_obj <- process_seurat(seurat_obj, dims = 1:20)
seurat_obj
})
purrr::map(seurat_list, DimPlot)
## $`12`
##
## $`19`
##
## $`3299`
Interestingly, we see subset of cells between Richter-like and CLL-like cells that have a high mitochondrial content
vars_of_interest <- c("pct_mt", "INPP5D", "MIR155HG", "CHD2")
feature_plots <- purrr::map(vars_of_interest, function(x) {
FeaturePlot(seurat_list$`19`, x) +
scale_colour_viridis_c(option = "inferno")
})
umap_richter <- DimPlot(seurat_list$`19`, group.by = "is_richter")
umap_richter
feature_plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
resolutions <- c(0.1, 0.25, 0.3, 0.4, 0.5)
seurat_list <- purrr::map(
seurat_list,
FindNeighbors,
reduction = "pca",
dims = 1:20
)
seurat_list <- purrr::map(seurat_list, FindClusters, resolution = resolutions)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 219940
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9400
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 219940
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8940
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 219940
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8811
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 219940
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8560
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 219940
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8329
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 9047
## Number of edges: 302623
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9460
## Number of communities: 5
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 9047
## Number of edges: 302623
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8891
## Number of communities: 6
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 9047
## Number of edges: 302623
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8758
## Number of communities: 6
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 9047
## Number of edges: 302623
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8490
## Number of communities: 6
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 9047
## Number of edges: 302623
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8247
## Number of communities: 8
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6546
## Number of edges: 232688
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9411
## Number of communities: 4
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6546
## Number of edges: 232688
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8822
## Number of communities: 6
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6546
## Number of edges: 232688
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8687
## Number of communities: 6
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6546
## Number of edges: 232688
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8431
## Number of communities: 6
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6546
## Number of edges: 232688
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8185
## Number of communities: 7
## Elapsed time: 0 seconds
# Chosen resolutions
umap_clusters_12 <- DimPlot(
seurat_list$`12`,
group.by = "RNA_snn_res.0.25",
cols = color_palette
)
Idents(seurat_list$`12`) <- "RNA_snn_res.0.25"
umap_clusters_19 <- DimPlot(
seurat_list$`19`,
group.by = "RNA_snn_res.0.25",
cols = color_palette
)
Idents(seurat_list$`19`) <- "RNA_snn_res.0.25"
umap_clusters_3299 <- DimPlot(
seurat_list$`3299`,
group.by = "RNA_snn_res.0.4",
cols = color_palette
)
Idents(seurat_list$`3299`) <- "RNA_snn_res.0.4"
umap_clusters_12
umap_clusters_19
umap_clusters_3299
FeatureScatter(
seurat_list$`19`,
feature1 = "nFeature_RNA",
feature2 = "pct_mt",
group.by = "RNA_snn_res.0.25"
)
saveRDS(seurat_list, path_to_save)
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.5 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.0 ggplot2_3.3.3 tidyverse_1.3.0 harmony_1.0 Rcpp_1.0.6 SeuratWrappers_0.3.0 SeuratObject_4.0.0 Seurat_4.0.1 BiocStyle_2.18.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 plyr_1.8.6 igraph_1.2.6 lazyeval_0.2.2 splines_4.0.4 listenv_0.8.0 scattermore_0.7 digest_0.6.27 htmltools_0.5.1.1 fansi_0.4.2 magrittr_2.0.1 tensor_1.5 cluster_2.1.1 ROCR_1.0-11 remotes_2.2.0 globals_0.14.0 modelr_0.1.8 matrixStats_0.58.0 spatstat.sparse_2.0-0 colorspace_2.0-0 rvest_1.0.0 ggrepel_0.9.1 haven_2.3.1 xfun_0.22 crayon_1.4.1 jsonlite_1.7.2 spatstat.data_2.1-0 survival_3.2-10 zoo_1.8-9 glue_1.4.2 polyclip_1.10-0 gtable_0.3.0 leiden_0.3.7 future.apply_1.7.0 abind_1.4-5 scales_1.1.1 DBI_1.1.1 miniUI_0.1.1.1 viridisLite_0.3.0 xtable_1.8-4 reticulate_1.18 spatstat.core_2.0-0 rsvd_1.0.3 htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.1 ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3 sass_0.3.1 uwot_0.1.10 dbplyr_2.1.0
## [55] deldir_0.2-10 here_1.0.1 utf8_1.2.1 labeling_0.4.2 tidyselect_1.1.0 rlang_0.4.10 reshape2_1.4.4 later_1.1.0.1 munsell_0.5.0 cellranger_1.1.0 tools_4.0.4 cli_2.3.1 generics_0.1.0 broom_0.7.5 ggridges_0.5.3 evaluate_0.14 fastmap_1.1.0 yaml_2.2.1 goftest_1.2-2 knitr_1.31 fs_1.5.0 fitdistrplus_1.1-3 RANN_2.6.1 pbapply_1.4-3 future_1.21.0 nlme_3.1-152 mime_0.10 xml2_1.3.2 compiler_4.0.4 rstudioapi_0.13 plotly_4.9.3 png_0.1-7 spatstat.utils_2.1-0 reprex_1.0.0 bslib_0.2.4 stringi_1.5.3 highr_0.8 RSpectra_0.16-0 lattice_0.20-41 Matrix_1.3-2 vctrs_0.3.6 pillar_1.5.1 lifecycle_1.0.0 BiocManager_1.30.10 spatstat.geom_2.0-1 lmtest_0.9-38 jquerylib_0.1.3 RcppAnnoy_0.0.18 data.table_1.14.0 cowplot_1.1.1 irlba_2.3.3 httpuv_1.5.5 patchwork_1.1.1 R6_2.5.0
## [109] bookdown_0.21 promises_1.2.0.1 KernSmooth_2.23-18 gridExtra_2.3 parallelly_1.24.0 codetools_0.2-18 MASS_7.3-53.1 assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.1 sctransform_0.3.2 mgcv_1.8-34 parallel_4.0.4 hms_1.0.0 grid_4.0.4 rpart_4.1-15 rmarkdown_2.7 Rtsne_0.15 shiny_1.6.0 lubridate_1.7.10